Covariates from the phenotype data | Results adjusted by age, sex and education | Function version March 2023.
## Create table
createDT <- function(DF, caption="", scrollY=500){
data <- DT::datatable(DF, caption=caption,
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
scrollY = scrollY, scrollX=T, scrollCollapse = T, paging = F,
columnDefs = list(list(className = 'dt-center', targets = "_all"))
)
)
return(data)
}
## Run regression tests
run_module_trait_association <- function(data4linear_reg, # Matrix with module eigengenes (predictor)
phenotype_dt, # Matrix with covariates (outcome + covariates)
pheno_list, # List of phenotypes to be tested (with classes = binomial or gaussian)
covariates = c("age_death","msex", "educ") # List of covariates to be adjusted
){
if (!require("lme4")) install.packages("lme4")
if (!require("lmerTest")) install.packages("lmerTest")
if (!require("performance")) install.packages("performance")
library(lme4)
library(lmerTest)
library(performance)
outcome = names(pheno_list)
outcome.family = pheno_list
# random_effect = "projid"
# avg_over_random_effect = T
matrix_rsquared = matrix(NA, nrow = length(outcome), ncol = ncol(mod_average)) #Number of modules
matrix_pvalue = matrix(NA, nrow = length(outcome), ncol = ncol(mod_average))
for (x in 1:length(pheno_list)){
for (y in 1:ncol(mod_average)){
outcome_pheno = outcome[x]
outcome_type = outcome.family[x]
dat4test_1 = setNames(as.data.frame(cbind(phenotype_dt[,outcome_pheno],data4linear_reg[,y])), c("outcome","predictor"))
if(!is.null(covariates)){
dat4test_2 = phenotype_dt[,covariates,drop=F]
dat4test = na.omit(cbind(dat4test_1, dat4test_2))
formula_string = as.formula(paste0("outcome ~ predictor + ", paste(covariates, collapse = " + ")))
print(paste0("Testing (n=",nrow(dat4test),"): ", outcome_pheno, " ~ ", names(data4linear_reg)[y], " + ", paste(covariates, collapse = " + ")))
}else{
dat4test = na.omit(dat4test_1)
formula_string = as.formula(paste0("outcome ~ predictor"))
print(paste0("Testing (n=",nrow(dat4test),"): ", outcome_pheno, " ~ ", names(data4linear_reg)[y]))
}
if (outcome_type == "gaussian"){
mod.obj0 = lm(formula_string, dat4test, na.action = "na.exclude")
matrix_rsquared[x,y] <- summary( mod.obj0 )$adj.r.squared
matrix_pvalue[x,y] <- summary( mod.obj0 )$coefficients["predictor","Pr(>|t|)"] #To insert pvalues in the heatmap
}
if (outcome_type == "binomial"){
dat4test$outcome = as.factor(dat4test$outcome)
mod.obj1 = glm(formula_string, dat4test, family = binomial, na.action = "na.exclude")
matrix_rsquared[x,y] <- 1 - mod.obj1$deviance/mod.obj1$null.deviance # Pseudo r-squared
matrix_pvalue[x,y] <- coef(summary(mod.obj1))["predictor",'Pr(>|z|)']
}
}
}
rownames(matrix_rsquared) = names(pheno_list)
rownames(matrix_pvalue) = names(pheno_list)
colnames(matrix_rsquared) = colnames(data4linear_reg)
colnames(matrix_pvalue) = colnames(data4linear_reg)
matrix_pvalue_df = setNames(reshape2::melt(matrix_pvalue), c("phenotype","module","nom_p"))
matrix_rsquared_df = setNames(reshape2::melt(matrix_rsquared), c("phenotype","module","rsquared"))
all_stats_df = matrix_pvalue_df %>% left_join(matrix_rsquared_df) %>% arrange(nom_p)
return(list(all_stats_df = all_stats_df, matrix_rsquared = matrix_rsquared, matrix_pvalue = matrix_pvalue))
}
plot_module_trait_association_heatmap <- function(res_test, to_show, show_only_significant = F, signif_cutoff = c("***","**","*")){
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
matrix_rsquared = res_test$matrix_rsquared
matrix_pvalue = res_test$matrix_pvalue
matrix_rsquared_to_plot = matrix_rsquared[,to_show]
matrix_pvalue_to_plot = matrix_pvalue[,to_show]
# Adjust P-values by each phenotype separately.
adj_matrix_pvalue_to_plot = matrix_pvalue_to_plot
for(i in 1:nrow(matrix_pvalue_to_plot)){
message("Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)")
adj_matrix_pvalue_to_plot[i,] = p.adjust(matrix_pvalue_to_plot[i,], method = "bonferroni")
}
adj_matrix_pvalue_to_plot.signif <- symnum(adj_matrix_pvalue_to_plot, corr = FALSE, na = FALSE,
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
symbols = c("***", "**", "*", ".", " "))
log_matrix_pvalue_to_plot = -log10(matrix_pvalue_to_plot)
dimnames(log_matrix_pvalue_to_plot) = dimnames(log_matrix_pvalue_to_plot)
if(show_only_significant){
if(is.numeric(signif_cutoff)){
to_keep = colSums(adj_matrix_pvalue_to_plot <= signif_cutoff) > 0
}else{
to_keep = rep(F,ncol(adj_matrix_pvalue_to_plot.signif))
for(cut_i in signif_cutoff){
to_keep = to_keep | colSums(adj_matrix_pvalue_to_plot.signif == cut_i) > 0 # change for the significance you want
}
}
log_matrix_pvalue_to_plot = log_matrix_pvalue_to_plot[,to_keep]
adj_matrix_pvalue_to_plot.signif = adj_matrix_pvalue_to_plot.signif[,to_keep]
}
matrix_pvalue_to_plot_labels = formatC(log_matrix_pvalue_to_plot, format = "f", digits = 2)
log_matrix_pvalue_to_plot_t = t(log_matrix_pvalue_to_plot)
# Colored by -log10(pvalue)
# Numbers inside cell = -log10(pvalue): nominal
Heatmap(log_matrix_pvalue_to_plot_t, name = "-log10(P-value)",
cell_fun = function(j, i, x, y, width, height, fill) {
if(as.character(t(adj_matrix_pvalue_to_plot.signif)[i,j]) == " "){
grid.text( t(matrix_pvalue_to_plot_labels)[i,j], x, y,
gp = gpar(fontsize = 8))
}else{
grid.text(paste0( t(matrix_pvalue_to_plot_labels)[i,j],"\n", t(adj_matrix_pvalue_to_plot.signif)[i,j] ), x, y,
gp = gpar(fontsize = 8))
}
},
col = colorRampPalette(rev(brewer.pal(n = 7, name ="RdYlBu")))(100),
row_names_side = "left", show_row_names = T,
cluster_rows = F, cluster_columns = F,
column_names_gp = gpar(fontsize = 9),
row_names_gp = gpar(fontsize = 9),
show_row_dend = F, show_column_dend = F, rect_gp = gpar(col = "white", lwd = 1))
}pheno_list = c(# "cogn_global_lv"="gaussian", # Global cognitive function - Average of 19 tests
"cogng_demog_slope"="gaussian", # Cognitive decline slope. Remove the effect of demog
"cogng_path_slope"="gaussian", # Resilience, removed the effect of path + demog
"tangles_sqrt"="gaussian", # Tangle density - Mean of 8 brain regions
# "nft_sqrt"="gaussian", # Neurofibrillary tangle summary based on 5 regions
"amyloid_sqrt"="gaussian", # Overall amyloid level - Mean of 8 brain regions
# "plaq_n_sqrt"="gaussian", # Neuritic plaque summary based on 5 regions
# "plaq_d_sqrt"="gaussian", # Diffuse plaque summary based on 5 regions
# "caa_4gp"="gaussian", # Cerebral amyloid angiopathy - 4 stages
"gpath"="gaussian", # Global burden of AD pathology based on 5 regions
"tdp_cs_6reg"="gaussian", # TDP-43, 6 region severity summary
# "parksc_lv"="gaussian", # Global parkinsonian summary score
# "cpd_lv"="binomial", # Clinical Parkinson's Disease
# "dxpark_status"="binomial", # Final Clinical Dx - Hx of Parkinson's disease/Parkinsonism (excl 9)
"ad_dementia_status"="binomial", # Clinical AD # CT = MCI + NCI
"ci_status"="binomial" # AD+ MCI vs NCI
# "tdp_43_binary"="binomial",
# "ci_num2_gct"="binomial", # Cerebral Infarctions - Binary - Gross-Chronic-Any Location
# "ci_num2_mct"="binomial", # Cerebral Infarctions - Binary - Micro-Chronic-Any Location
# "arteriol_scler"="gaussian", # Arteriolosclerosis - 4 stages
# "cvda_4gp2"="gaussian", # Cerebral Atherosclerosis Rating - 4 levels (None - severe)
# "vasc_3dis_sum_lv"="gaussian", # Cumulative vascular disease burden - Average of 3 items (ROS/MAP/MARS)
# "vasc_risks_sum_lv"="gaussian", # Cumulative vascular disease risk factors - Average of 3 items
# "CR_slope_lpm"="gaussian",
# "CR_slope_lmm"="gaussian",
# "CR_mean_level_lpm"="gaussian",
# "CR_mean_level_lmm"="gaussian",
# "age_death"="gaussian",
# "msex"="binomial"
)net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_net_MF/"
work_dir = "/pastel/Github_scripts/SpeakEasy_dlpfc/bulk_dlpfc/assoc_analysis/"Same covariates tested for the ST and SN networks. Show all modules. Input: Average expression by module.
Numbers and colors : -log10(nominal pvalue)
cutpoints:
< 0.001 = ***
0.01 = **
0.05 = *
0.1 = .
1 = ” ”
### Modules from SE
modules_file = read.table(paste0(net_dir, "geneBycluster.txt"), header = T)
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")
### eigengenes and average expression
load(paste0(net_dir, "lv3_moduleEigengenes.Rdata"))
mod_eigengene = lv3_moduleEigengenes$eigengenes
mod_average = lv3_moduleEigengenes$averageExpr
mod_average$projid = gsub("(.*)_(.*)", "\\2", rownames(mod_eigengene)) #get the projid to match with phenotype data
rownames(mod_average) = mod_average$projid
mod_average$projid = NULL
data4linear_reg <- mod_average # or mod_eigengene
load("/pastel/projects/spatial_t/pseudo_bulk/phenotypes.RData") # phenotypes
phenotype_dt = phenotypes[match(rownames(data4linear_reg), phenotypes$projid),]
all(rownames(data4linear_reg) == phenotype_dt$projid) # Must be TRUE. Match IDs
res_test = run_module_trait_association(data4linear_reg, phenotype_dt, pheno_list, covariates = c("age_death","msex", "educ"))
matrix_rsquared = res_test$matrix_rsquared
matrix_pvalue = res_test$matrix_pvalue
### Get modules >= 30 nodes to show:
to_show = paste0("AE",as.character( modules_size$module[modules_size$n_nodes >= 30]))
# save(res_test, file = paste0(work_dir, "results_lr_bulk_MF.Rdata"))
plot_module_trait_association_heatmap(res_test, to_show)Top result by covariate.
Threshold: At least one module with adjusted pvalue < 0.05.
Method: Bonferroni by column.
plot_module_trait_association_heatmap(res_test, to_show, show_only_significant = T, signif_cutoff = 0.05)## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
# pdf(paste0(net_dir, "bulk_DLPFC_topmod.pdf"), width = 5, height = 4)
plot_module_trait_association_heatmap(res_test, to_show, show_only_significant = T, signif_cutoff = 0.000001)## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] RColorBrewer_1.1-3 circlize_0.4.16 ComplexHeatmap_2.15.4
## [4] performance_0.12.2 lmerTest_3.1-3 lme4_1.1-35.1
## [7] Matrix_1.6-5 lubridate_1.9.3 forcats_1.0.0
## [10] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2
## [13] readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [16] tidyverse_2.0.0 ggpubr_0.6.0 gplots_3.1.3.1
## [19] broom_1.0.6 ggplot2_3.5.1 ggeasy_0.1.4
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-153 bitops_1.0-8 matrixStats_1.3.0
## [4] insight_0.20.2 doParallel_1.0.17 numDeriv_2016.8-1.1
## [7] tools_4.1.2 backports_1.5.0 bslib_0.8.0
## [10] DT_0.33 utf8_1.2.4 R6_2.5.1
## [13] KernSmooth_2.23-20 BiocGenerics_0.40.0 colorspace_2.1-1
## [16] GetoptLong_1.0.5 withr_3.0.0 tidyselect_1.2.1
## [19] compiler_4.1.2 cli_3.6.3 Cairo_1.6-2
## [22] sass_0.4.9 caTools_1.18.2 scales_1.3.0
## [25] digest_0.6.36 minqa_1.2.7 rmarkdown_2.27
## [28] pkgconfig_2.0.3 htmltools_0.5.8.1 highr_0.11
## [31] fastmap_1.2.0 htmlwidgets_1.6.4 rlang_1.1.4
## [34] GlobalOptions_0.1.2 rstudioapi_0.16.0 shape_1.4.6.1
## [37] jquerylib_0.1.4 generics_0.1.3 jsonlite_1.8.8
## [40] crosstalk_1.2.1 gtools_3.9.5 car_3.1-2
## [43] magrittr_2.0.3 S4Vectors_0.32.4 Rcpp_1.0.13
## [46] munsell_0.5.1 fansi_1.0.6 abind_1.4-5
## [49] lifecycle_1.0.4 stringi_1.8.4 yaml_2.3.10
## [52] carData_3.0-5 MASS_7.3-60.0.1 plyr_1.8.9
## [55] parallel_4.1.2 crayon_1.5.3 lattice_0.20-45
## [58] splines_4.1.2 hms_1.1.3 magick_2.8.4
## [61] knitr_1.48 pillar_1.9.0 boot_1.3-28
## [64] rjson_0.2.21 ggsignif_0.6.4 stats4_4.1.2
## [67] reshape2_1.4.4 codetools_0.2-18 glue_1.7.0
## [70] evaluate_0.24.0 vctrs_0.6.5 png_0.1-8
## [73] nloptr_2.1.1 tzdb_0.4.0 foreach_1.5.2
## [76] gtable_0.3.5 clue_0.3-65 cachem_1.1.0
## [79] xfun_0.46 rstatix_0.7.2 iterators_1.0.14
## [82] IRanges_2.28.0 cluster_2.1.2 timechange_0.3.0